# description of data is here:
# http://www.stat.columbia.edu/~gelman/book/data/presidential.asc
electiondata <- read.table("election.txt", header = TRUE)
namescol <- names(electiondata)
# Fig 1: visualize response and covariates
for(j in 1:dim(electiondata)[2]){
hist(electiondata[, j], main = namescol[j], xlab = toString(round(summary(electiondata[, j]), 3)), ylab = "")
}
idxNA <- which(unlist(apply(electiondata, 1, function(x){sum(is.na(x))})) > 0)
print(electiondata[idxNA, ])
## Dvote year state evotes constant n1 n2 n3 n4 s1 s2
## 251 0.260 1972 1 9 1 0.3711 -0.62 -0.62 -2.17 0.077 NA
## 301 0.573 1968 1 10 1 0.4189 0.49 0.00 1.30 NA 0.073
## 324 0.630 1968 24 7 1 0.4189 0.49 0.00 1.30 -0.484 NA
## 351 NA 1964 1 10 1 0.6915 0.74 0.74 1.12 0.073 0.167
## 352 0.659 1964 2 3 1 0.6915 0.74 0.74 1.12 -0.010 NA
## 361 0.788 1964 11 4 1 0.6915 0.74 0.74 1.12 -0.001 NA
## 374 0.129 1964 24 7 1 0.6915 0.74 0.74 1.12 NA 0.282
## 402 0.491 1960 2 3 1 0.4946 -0.68 0.00 -0.39 NA NA
## 411 0.500 1960 11 3 1 0.4946 -0.68 0.00 -0.39 NA NA
## 424 NA 1960 24 8 1 0.4946 -0.68 0.00 -0.39 0.282 0.158
## 451 0.589 1956 1 11 1 0.4409 -0.69 -0.69 -0.46 0.178 NA
## 453 0.534 1956 4 8 1 0.4409 -0.69 -0.69 -0.46 0.114 NA
## 458 0.427 1956 9 10 1 0.4409 -0.69 -0.69 -0.46 0.004 NA
## 459 0.666 1956 10 12 1 0.4409 -0.69 -0.69 -0.46 0.251 NA
## 466 0.426 1956 18 10 1 0.4409 -0.69 -0.69 -0.46 0.083 NA
## 472 0.704 1956 24 8 1 0.4409 -0.69 -0.69 -0.46 0.158 NA
## 481 0.507 1956 33 14 1 0.4409 -0.69 -0.69 -0.46 0.093 NA
## 488 0.643 1956 40 8 1 0.4409 -0.69 -0.69 -0.46 0.061 NA
## 490 0.497 1956 42 11 1 0.4409 -0.69 -0.69 -0.46 0.052 NA
## 491 0.443 1956 43 24 1 0.4409 -0.69 -0.69 -0.46 0.088 NA
## 494 0.409 1956 46 12 1 0.4409 -0.69 -0.69 -0.46 -0.011 NA
## 499 0.649 1952 1 11 1 0.4211 0.28 0.00 -0.24 NA 0.279
## 501 0.560 1952 4 8 1 0.4211 0.28 0.00 -0.24 NA 0.163
## 506 0.450 1952 9 10 1 0.4211 0.28 0.00 -0.24 NA 0.165
## 507 0.697 1952 10 12 1 0.4211 0.28 0.00 -0.24 NA 0.288
## 514 0.529 1952 18 10 1 0.4211 0.28 0.00 -0.24 NA 0.268
## 520 0.604 1952 24 8 1 0.4211 0.28 0.00 -0.24 NA 0.398
## 529 0.539 1952 33 14 1 0.4211 0.28 0.00 -0.24 NA 0.129
## 536 0.507 1952 40 8 1 0.4211 0.28 0.00 -0.24 NA 0.413
## 538 0.498 1952 42 11 1 0.4211 0.28 0.00 -0.24 NA 0.069
## 539 0.468 1952 43 24 1 0.4211 0.28 0.00 -0.24 NA 0.272
## 542 0.435 1952 46 12 1 0.4211 0.28 0.00 -0.24 NA 0.087
## 547 NA 1948 1 11 1 0.4561 0.39 0.39 1.78 0.279 0.306
## s3 s4 s5 s6 s7 s8 s9 r1 r2 r3
## 251 0 0 0.4811321 -1.23101099 -0.5169 -0.21445981 0.00000000 0 0 0
## 301 0 0 0.5000000 -1.08320756 -0.5169 -0.23250000 0.00000000 0 0 0
## 324 0 0 0.4836066 -0.08320756 -0.7610 -0.23250000 0.00000000 0 0 0
## 351 0 0 0.4811321 2.44829330 -0.5169 0.31750000 0.00000000 0 -1 -1
## 352 0 0 0.0000000 3.44829330 0.2181 0.14523025 0.00000000 0 0 0
## 361 0 0 0.2843137 0.84829330 0.5367 0.31750000 0.00000000 0 0 0
## 374 0 0 0.5000000 0.94829330 -0.7610 0.31750000 0.00000000 0 -1 -1
## 402 0 0 0.3500000 -2.00906169 0.2181 0.03773025 -0.06447399 0 0 0
## 411 0 0 0.1470588 -5.20906169 0.5367 0.14500000 0.10097422 0 0 0
## 424 0 0 0.5000000 3.29093831 -0.7610 0.14500000 -0.19114162 0 0 0
## 451 0 0 0.5000000 -4.75645805 -0.5169 -0.08458906 0.00000000 0 0 0
## 453 0 0 0.4797980 -2.65645805 -0.2121 0.13500000 0.00000000 0 0 0
## 458 0 0 0.4368421 -8.15645805 -0.3438 0.06126786 0.00000000 0 0 0
## 459 0 0 0.4853659 -1.35645805 -0.4633 0.13500000 0.00000000 0 0 0
## 466 0 0 0.5000000 -2.35645805 -0.3767 0.13500000 0.00000000 0 0 0
## 472 0 0 0.5000000 -2.55645805 -0.7610 0.13500000 0.00000000 0 0 0
## 481 0 0 0.4243697 0.04354195 -0.5367 0.13500000 0.00000000 0 0 0
## 488 0 0 0.5000000 3.44354195 -0.5476 0.13500000 0.00000000 0 0 0
## 490 0 1 0.3061224 0.24354195 -0.2737 0.13500000 0.00000000 0 0 0
## 491 -1 0 0.5000000 1.24354195 -0.2663 0.13500000 0.00000000 0 0 0
## 494 0 0 0.4400000 1.54354195 -0.6992 -0.02295759 0.00000000 0 0 0
## 499 0 1 0.4905660 0.21605450 -0.5169 -0.09708906 0.00000000 0 0 0
## 501 0 0 0.4800000 -6.08394550 -0.2121 0.15250000 0.00000000 0 0 0
## 506 0 0 0.4684211 1.51605450 -0.3438 0.04876786 0.00000000 0 0 0
## 507 0 0 0.4951220 3.81605450 -0.4633 0.15250000 0.00000000 0 0 0
## 514 0 0 0.5000000 1.81605450 -0.3767 0.15250000 0.00000000 0 0 0
## 520 0 0 0.5000000 0.31605450 -0.7610 0.15250000 0.00000000 0 0 0
## 529 0 0 0.4250000 -1.58394550 -0.5367 0.15250000 0.00000000 0 0 0
## 536 0 0 0.5000000 7.01605450 -0.5476 0.15250000 0.00000000 0 0 0
## 538 0 0 0.3080808 -3.88394550 -0.2737 0.13418020 0.00000000 0 0 0
## 539 -1 0 0.4933333 4.81605450 -0.2663 0.15250000 0.00000000 0 0 0
## 542 0 0 0.4285714 2.31605450 -0.6992 -0.03545759 0.00000000 0 0 0
## 547 0 0 0.4905660 -0.47588378 -0.5169 -0.14708906 0.00000000 0 0 0
## r4 r5 r6
## 251 0 0 0
## 301 0 0 0
## 324 0 0 0
## 351 0 0 0
## 352 0 0 0
## 361 0 0 0
## 374 0 0 0
## 402 0 0 0
## 411 0 0 0
## 424 0 0 0
## 451 0 0 0
## 453 0 0 0
## 458 0 0 0
## 459 0 0 0
## 466 0 0 0
## 472 0 0 0
## 481 0 0 0
## 488 0 0 0
## 490 0 0 0
## 491 0 0 0
## 494 0 0 0
## 499 0 0 0
## 501 0 0 0
## 506 0 0 0
## 507 0 0 0
## 514 0 0 0
## 520 0 0 0
## 529 0 0 0
## 536 0 0 0
## 538 0 0 0
## 539 0 0 0
## 542 0 0 0
## 547 0 0 0
electiondata <- electiondata[setdiff(1:dim(electiondata)[1], idxNA), ]
# vote for the Democrats
Y <- electiondata$Dvote
# predictors
X <- electiondata[, 2:dim(electiondata)[2]]
library(ggplot2)
for(j in 2:dim(electiondata)[2]){
plot(electiondata[, j], electiondata$Dvote, xlab = names(electiondata)[j], ylab = "Dvote")
}
years_uniq <- sort(unique(electiondata$year))
for(j in 1:(length(years_uniq) - 1)){
prev <- electiondata$Dvote[electiondata$year == years_uniq[j]]
new <- electiondata$Dvote[electiondata$year == years_uniq[j+1]]
if(length(prev) == length(new)){
plot(prev, new, xlab = years_uniq[j], ylab = years_uniq[j+1], main = '')
}
}
linearregressionresult <- lm(Y~as.matrix(X))
summary(linearregressionresult)
##
## Call:
## lm(formula = Y ~ as.matrix(X))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.094012 -0.024325 -0.000339 0.023995 0.113614
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.001e+00 3.006e-01 -3.330 0.000927 ***
## as.matrix(X)year 6.588e-04 1.547e-04 4.258 2.44e-05 ***
## as.matrix(X)state 2.805e-05 1.092e-04 0.257 0.797284
## as.matrix(X)evotes 4.304e-04 1.791e-04 2.404 0.016566 *
## as.matrix(X)constant NA NA NA NA
## as.matrix(X)n1 3.784e-01 2.601e-02 14.548 < 2e-16 ***
## as.matrix(X)n2 -1.397e-02 5.782e-03 -2.416 0.016011 *
## as.matrix(X)n3 4.879e-02 7.866e-03 6.203 1.11e-09 ***
## as.matrix(X)n4 2.839e-02 1.857e-03 15.285 < 2e-16 ***
## as.matrix(X)s1 3.047e-01 3.582e-02 8.508 < 2e-16 ***
## as.matrix(X)s2 2.633e-01 2.878e-02 9.148 < 2e-16 ***
## as.matrix(X)s3 4.587e-02 8.119e-03 5.650 2.61e-08 ***
## as.matrix(X)s4 1.619e-02 7.873e-03 2.056 0.040290 *
## as.matrix(X)s5 3.826e-02 9.907e-03 3.862 0.000126 ***
## as.matrix(X)s6 1.181e-03 4.242e-04 2.783 0.005574 **
## as.matrix(X)s7 3.042e-02 5.588e-03 5.443 7.96e-08 ***
## as.matrix(X)s8 3.780e-02 1.758e-02 2.150 0.032026 *
## as.matrix(X)s9 1.226e-01 4.296e-02 2.854 0.004483 **
## as.matrix(X)r1 5.518e-02 7.695e-03 7.171 2.48e-12 ***
## as.matrix(X)r2 8.827e-02 1.624e-02 5.435 8.34e-08 ***
## as.matrix(X)r3 1.823e-01 2.585e-02 7.055 5.35e-12 ***
## as.matrix(X)r4 6.847e-02 1.217e-02 5.627 2.95e-08 ***
## as.matrix(X)r5 6.834e-02 1.277e-02 5.351 1.29e-07 ***
## as.matrix(X)r6 6.145e-02 9.229e-03 6.658 6.86e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03617 on 538 degrees of freedom
## Multiple R-squared: 0.8622, Adjusted R-squared: 0.8566
## F-statistic: 153 on 22 and 538 DF, p-value: < 2.2e-16
plot(linearregressionresult$fitted.values, Y)
lines(quantile(Y, c(0.01, 0.99)), quantile(Y, c(0.01, 0.99)), col = "red")
# Rstan
require(rstanarm)
## Loading required package: rstanarm
## Loading required package: Rcpp
## rstanarm (Version 2.18.2, packaged: 2018-11-08 22:19:38 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - Plotting theme set to bayesplot::theme_default().
postlinearmodel <- stan_glm(Dvote ~ .-1, data = electiondata,
family = gaussian(link = "identity"))
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000697 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 6.97 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 87.9119 seconds (Warm-up)
## Chain 1: 61.1866 seconds (Sampling)
## Chain 1: 149.099 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 8.3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.83 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 91.21 seconds (Warm-up)
## Chain 2: 62.8687 seconds (Sampling)
## Chain 2: 154.079 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 7.7e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.77 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 70.4719 seconds (Warm-up)
## Chain 3: 61.5945 seconds (Sampling)
## Chain 3: 132.066 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 7.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.75 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 84.4976 seconds (Warm-up)
## Chain 4: 61.0738 seconds (Sampling)
## Chain 4: 145.571 seconds (Total)
## Chain 4:
summary(postlinearmodel)
##
## Model Info:
##
## function: stan_glm
## family: gaussian [identity]
## formula: Dvote ~ . - 1
## algorithm: sampling
## priors: see help('prior_summary')
## sample: 4000 (posterior sample size)
## observations: 561
## predictors: 23
##
## Estimates:
## mean sd 2.5% 25% 50% 75% 97.5%
## year 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## state 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## evotes 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## constant -0.4 0.2 -0.7 -0.5 -0.4 -0.3 0.0
## n1 0.4 0.0 0.3 0.4 0.4 0.4 0.4
## n2 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## n3 0.0 0.0 0.0 0.0 0.0 0.1 0.1
## n4 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s1 0.3 0.0 0.2 0.3 0.3 0.3 0.4
## s2 0.3 0.0 0.2 0.2 0.3 0.3 0.3
## s3 0.0 0.0 0.0 0.0 0.0 0.1 0.1
## s4 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s5 0.0 0.0 0.0 0.0 0.0 0.0 0.1
## s6 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s7 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s8 0.0 0.0 0.0 0.0 0.0 0.1 0.1
## s9 0.1 0.0 0.0 0.1 0.1 0.2 0.2
## r1 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## r2 0.1 0.0 0.1 0.1 0.1 0.1 0.1
## r3 0.2 0.0 0.1 0.2 0.2 0.2 0.2
## r4 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## r5 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## r6 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## sigma 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## mean_PPD 0.5 0.0 0.5 0.5 0.5 0.5 0.5
## log-posterior 1039.2 3.6 1031.3 1036.9 1039.5 1041.7 1045.1
##
## Diagnostics:
## mcse Rhat n_eff
## year 0.0 1.0 2703
## state 0.0 1.0 7401
## evotes 0.0 1.0 5229
## constant 0.0 1.0 2794
## n1 0.0 1.0 3038
## n2 0.0 1.0 3375
## n3 0.0 1.0 3168
## n4 0.0 1.0 3564
## s1 0.0 1.0 3681
## s2 0.0 1.0 4944
## s3 0.0 1.0 5584
## s4 0.0 1.0 7240
## s5 0.0 1.0 3857
## s6 0.0 1.0 6616
## s7 0.0 1.0 4315
## s8 0.0 1.0 3575
## s9 0.0 1.0 5303
## r1 0.0 1.0 4501
## r2 0.0 1.0 3507
## r3 0.0 1.0 4089
## r4 0.0 1.0 5094
## r5 0.0 1.0 6088
## r6 0.0 1.0 5032
## sigma 0.0 1.0 5517
## mean_PPD 0.0 1.0 3688
## log-posterior 0.1 1.0 1388
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
posteriorsampleslm <- as.matrix(postlinearmodel)
coeflm <- as.numeric(linearregressionresult$coefficients)
coeflm[5] <- coeflm[1]
cbind(coef(postlinearmodel), coeflm[-1])
## [,1] [,2]
## year 3.414303e-04 6.587634e-04
## state 1.851498e-05 2.805306e-05
## evotes 4.147651e-04 4.304138e-04
## constant -3.844254e-01 -1.001246e+00
## n1 3.941203e-01 3.784376e-01
## n2 -1.284045e-02 -1.397224e-02
## n3 4.525827e-02 4.878971e-02
## n4 2.686466e-02 2.838677e-02
## s1 2.959038e-01 3.047399e-01
## s2 2.538682e-01 2.632691e-01
## s3 4.491983e-02 4.587272e-02
## s4 1.644348e-02 1.618528e-02
## s5 4.077731e-02 3.825868e-02
## s6 1.140069e-03 1.180717e-03
## s7 2.999306e-02 3.041943e-02
## s8 4.626688e-02 3.779558e-02
## s9 1.298037e-01 1.226186e-01
## r1 5.538342e-02 5.518309e-02
## r2 9.152500e-02 8.826796e-02
## r3 1.806605e-01 1.823343e-01
## r4 6.566946e-02 6.847465e-02
## r5 6.737347e-02 6.833733e-02
## r6 6.156956e-02 6.144558e-02
linearregressionresult2 <- lm(Y~as.matrix(X)[, -(1:3)])
summary(linearregressionresult2)
##
## Call:
## lm(formula = Y ~ as.matrix(X)[, -(1:3)])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.101423 -0.024965 -0.000357 0.024100 0.115080
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2832418 0.0121572 23.298 < 2e-16 ***
## as.matrix(X)[, -(1:3)]constant NA NA NA NA
## as.matrix(X)[, -(1:3)]n1 0.4151813 0.0249152 16.664 < 2e-16 ***
## as.matrix(X)[, -(1:3)]n2 -0.0118705 0.0058524 -2.028 0.043020 *
## as.matrix(X)[, -(1:3)]n3 0.0415290 0.0077887 5.332 1.43e-07 ***
## as.matrix(X)[, -(1:3)]n4 0.0250209 0.0017138 14.600 < 2e-16 ***
## as.matrix(X)[, -(1:3)]s1 0.2956735 0.0362343 8.160 2.36e-15 ***
## as.matrix(X)[, -(1:3)]s2 0.2449690 0.0289011 8.476 < 2e-16 ***
## as.matrix(X)[, -(1:3)]s3 0.0400832 0.0080632 4.971 8.95e-07 ***
## as.matrix(X)[, -(1:3)]s4 0.0146022 0.0079497 1.837 0.066785 .
## as.matrix(X)[, -(1:3)]s5 0.0404499 0.0099340 4.072 5.36e-05 ***
## as.matrix(X)[, -(1:3)]s6 0.0011482 0.0004312 2.663 0.007981 **
## as.matrix(X)[, -(1:3)]s7 0.0298943 0.0056833 5.260 2.08e-07 ***
## as.matrix(X)[, -(1:3)]s8 0.0572498 0.0174137 3.288 0.001076 **
## as.matrix(X)[, -(1:3)]s9 0.1489682 0.0433676 3.435 0.000638 ***
## as.matrix(X)[, -(1:3)]r1 0.0573191 0.0077921 7.356 7.07e-13 ***
## as.matrix(X)[, -(1:3)]r2 0.0919283 0.0164507 5.588 3.64e-08 ***
## as.matrix(X)[, -(1:3)]r3 0.1859974 0.0262716 7.080 4.50e-12 ***
## as.matrix(X)[, -(1:3)]r4 0.0594448 0.0122382 4.857 1.56e-06 ***
## as.matrix(X)[, -(1:3)]r5 0.0678408 0.0129952 5.220 2.55e-07 ***
## as.matrix(X)[, -(1:3)]r6 0.0641587 0.0093380 6.871 1.76e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03684 on 541 degrees of freedom
## Multiple R-squared: 0.8562, Adjusted R-squared: 0.8512
## F-statistic: 169.6 on 19 and 541 DF, p-value: < 2.2e-16
plot(linearregressionresult2$fitted.values, Y)
lines(quantile(Y, c(0.01, 0.99)), quantile(Y, c(0.01, 0.99)), col = "red")
require(rstanarm)
postlinearmodel2 <- stan_glm(Dvote ~ constant + n1+n2+n3+n4+s1+s2+s3+s4+s5+s6+s7+s8+s9+r1+r2+r3+r4+r5+r6, data = electiondata,
family = gaussian(link = "identity"))
## Warning in center_x(x, sparse): Dropped empty interaction levels: constant
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000236 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.36 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 2.00273 seconds (Warm-up)
## Chain 1: 1.44319 seconds (Sampling)
## Chain 1: 3.44593 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 6.6e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.66 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 1.89684 seconds (Warm-up)
## Chain 2: 0.945545 seconds (Sampling)
## Chain 2: 2.84238 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 7.3e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.73 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 1.79661 seconds (Warm-up)
## Chain 3: 1.0029 seconds (Sampling)
## Chain 3: 2.7995 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 7.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.75 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 2.01573 seconds (Warm-up)
## Chain 4: 0.976628 seconds (Sampling)
## Chain 4: 2.99236 seconds (Total)
## Chain 4:
summary(postlinearmodel2)
##
## Model Info:
##
## function: stan_glm
## family: gaussian [identity]
## formula: Dvote ~ constant + n1 + n2 + n3 + n4 + s1 + s2 + s3 + s4 + s5 +
## s6 + s7 + s8 + s9 + r1 + r2 + r3 + r4 + r5 + r6
## algorithm: sampling
## priors: see help('prior_summary')
## sample: 4000 (posterior sample size)
## observations: 561
## predictors: 21
##
## Estimates:
## mean sd 2.5% 25% 50% 75% 97.5%
## (Intercept) 0.3 0.0 0.3 0.3 0.3 0.3 0.3
## n1 0.4 0.0 0.4 0.4 0.4 0.4 0.5
## n2 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## n3 0.0 0.0 0.0 0.0 0.0 0.0 0.1
## n4 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s1 0.3 0.0 0.2 0.3 0.3 0.3 0.4
## s2 0.2 0.0 0.2 0.2 0.2 0.3 0.3
## s3 0.0 0.0 0.0 0.0 0.0 0.0 0.1
## s4 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s5 0.0 0.0 0.0 0.0 0.0 0.0 0.1
## s6 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s7 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## s8 0.1 0.0 0.0 0.0 0.1 0.1 0.1
## s9 0.1 0.0 0.1 0.1 0.1 0.2 0.2
## r1 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## r2 0.1 0.0 0.1 0.1 0.1 0.1 0.1
## r3 0.2 0.0 0.1 0.2 0.2 0.2 0.2
## r4 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## r5 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## r6 0.1 0.0 0.0 0.1 0.1 0.1 0.1
## sigma 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## mean_PPD 0.5 0.0 0.5 0.5 0.5 0.5 0.5
## log-posterior 1035.2 3.2 1028.1 1033.3 1035.6 1037.6 1040.7
##
## Diagnostics:
## mcse Rhat n_eff
## (Intercept) 0.0 1.0 3360
## n1 0.0 1.0 3289
## n2 0.0 1.0 2839
## n3 0.0 1.0 3077
## n4 0.0 1.0 3085
## s1 0.0 1.0 3213
## s2 0.0 1.0 4103
## s3 0.0 1.0 6057
## s4 0.0 1.0 5568
## s5 0.0 1.0 3341
## s6 0.0 1.0 6510
## s7 0.0 1.0 3665
## s8 0.0 1.0 3117
## s9 0.0 1.0 4230
## r1 0.0 1.0 3820
## r2 0.0 1.0 3672
## r3 0.0 1.0 3533
## r4 0.0 1.0 3912
## r5 0.0 1.0 4726
## r6 0.0 1.0 3813
## sigma 0.0 1.0 5148
## mean_PPD 0.0 1.0 4992
## log-posterior 0.1 1.0 1672
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
posteriorsampleslm2 <- as.matrix(postlinearmodel2)
coeflm2 <- as.numeric(linearregressionresult2$coefficients)
coeflm2[2] <- coeflm2[1]
cbind(coef(postlinearmodel2)[1:20], coeflm2[-1])
## [,1] [,2]
## (Intercept) 0.283152176 0.283241772
## n1 0.415314973 0.415181305
## n2 -0.012039292 -0.011870470
## n3 0.041399567 0.041529023
## n4 0.025079846 0.025020909
## s1 0.295345496 0.295673462
## s2 0.245190699 0.244968991
## s3 0.040091186 0.040083235
## s4 0.015208162 0.014602152
## s5 0.040263185 0.040449916
## s6 0.001139342 0.001148189
## s7 0.029693174 0.029894283
## s8 0.057265716 0.057249766
## s9 0.149750055 0.148968199
## r1 0.057097953 0.057319109
## r2 0.092486481 0.091928284
## r3 0.184502348 0.185997421
## r4 0.059437167 0.059444787
## r5 0.067463586 0.067840815
## r6 0.064119824 0.064158736
# estimated sigma: residue s.d.
summary(posteriorsampleslm2[, 21])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.03306 0.03613 0.03688 0.03690 0.03766 0.04136
summary(posteriorsampleslm[, 24])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.03254 0.03557 0.03630 0.03633 0.03705 0.04083
# posterior correlations of parameters
library(ggplot2)
library(reshape2)
ggplot(data = melt(cov2cor(vcov(postlinearmodel))), aes(x=Var1, y=Var2, fill=value)) +
geom_tile() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
# posterior predictive quantities
years <- sort(unique(electiondata$year))
ypred <- posteriorsampleslm[, 1:23] %*% t(X) + rnorm(4000) * posteriorsampleslm[, 24]
# examine residues
residuepred <- ypred - posteriorsampleslm[, 1:23] %*% t(X)
residueobs <- t(apply(posteriorsampleslm[, 1:23] %*% t(X), 1, function(z) Y-z))
par(mfrow = c(1, 2))
hist(residueobs, xlim = c(-0.15, 0.15), freq = FALSE)
hist(residuepred, xlim = c(-0.15, 0.15), freq = FALSE)
par(mfrow = c(1, 2))
for(j in 1:length(years)){
idx <- which(electiondata$year == years[j])
hist(apply(ypred[, idx], 1, mean), xlab="mean", freq=FALSE, main = years[j])
abline(v=mean(Y[idx]), col = "red")
hist(apply(ypred[, idx], 1, var), xlab="var", freq=FALSE, main = years[j])
abline(v=var(Y[idx]), col = "red")
}
# posterior correlations of parameters
ggplot(data = melt(cov2cor(vcov(postlinearmodel2))), aes(x=Var1, y=Var2, fill=value)) +
geom_tile() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
# posterior predictive quantities
years <- sort(unique(electiondata$year))
ypred2 <- posteriorsampleslm2[, 1:20] %*% t(X[4:23]) + rnorm(4000) * posteriorsampleslm2[, 21]
residuepred2 <- ypred2 - posteriorsampleslm2[, 1:20] %*% t(X[4:23])
residueobs2 <- t(apply(posteriorsampleslm2[, 1:20] %*% t(X[4:23]), 1, function(z) Y-z))
par(mfrow = c(1, 2))
hist(residueobs2, xlim = c(-0.15, 0.15), freq = FALSE)
hist(residuepred2, xlim = c(-0.15, 0.15), freq = FALSE)
par(mfrow = c(1, 2))
for(j in 1:length(years)){
idx <- which(electiondata$year == years[j])
hist(apply(ypred2[, idx], 1, mean), xlab="mean", freq=FALSE, main = years[j])
abline(v=mean(Y[idx]), col = "red")
hist(apply(ypred2[, idx], 1, var), xlab="variance", freq=FALSE, main = years[j])
abline(v=var(Y[idx]), col = "red")
}
# does last year predict this year?
par(mfrow = c(2, 2))
for(k in 2:length(years)){
idxcurrent <- which(electiondata$year == years[k])
idxpast <- which(electiondata$year == years[k-1])
commonstates <- intersect(electiondata$state[idxcurrent], electiondata$state[idxpast])
idxcurrent <- idxcurrent[which(electiondata$state[idxcurrent] %in% commonstates)]
idxpast <- idxpast[which(electiondata$state[idxpast] %in% commonstates)]
plot(Y[idxpast], Y[idxcurrent], ylab = years[k], xlab = years[k-1])
}
# does other states predict current state?
states <- sort(unique(electiondata$state))
Rmat <- array(NA, c(length(states), length(years), 2))
for(k in 1:length(states)){
for(j in 1:length(years)){
idxcs <- intersect(which(electiondata$year == years[j]), which(electiondata$state == states[k]))
idxos <- setdiff(which(electiondata$year == years[j]), idxcs)
if(length(idxcs) > 0 && length(idxos) > 0){
Rmat[k, j, ] <- c(mean(Y[idxcs]), mean(Y[idxos]))
}
}
}
par(mfrow = c(3,3))
for(i in 1:length(states)){
plot(Rmat[i, , 1], Rmat[i, , 2])
}
# Expand Model
# a varying coefficient model
Ymat <- array(0, c(length(states), length(years)))
Xmat <- array(0, c(length(states), length(years), 14))
for(i in 1:length(years)){
idx <- which(electiondata$year == years[i])
Ymat[electiondata$state[idx], i] <- Y[idx]
for(j in 1:14){
Xmat[electiondata$state[idx], i, j] <- electiondata[idx,j+4]
}
}
Southlabel <- rep(0, length(states))
SouthStates <- c(1, 4, 9, 10, 17, 18, 24, 33, 36, 40, 42, 43, 46)
Southlabel[SouthStates] <- 1
InputDataRegion <- list(Nt = length(years), Ns = length(states), p = 14, Y = Ymat, X = Xmat, Southlabel = Southlabel)
require(rstan)
## Loading required package: rstan
## Loading required package: StanHeaders
## rstan (Version 2.18.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
samplesRegionalModel <- stan(file = "RegionElection.stan", data = InputDataRegion)
##
## SAMPLING FOR MODEL 'RegionElection' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000318 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 3.18 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 133.834 seconds (Warm-up)
## Chain 1: 158.751 seconds (Sampling)
## Chain 1: 292.585 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'RegionElection' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000144 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.44 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 133.466 seconds (Warm-up)
## Chain 2: 158.844 seconds (Sampling)
## Chain 2: 292.31 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'RegionElection' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.000205 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.05 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 138.68 seconds (Warm-up)
## Chain 3: 159.796 seconds (Sampling)
## Chain 3: 298.476 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'RegionElection' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.000155 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.55 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 143.895 seconds (Warm-up)
## Chain 4: 165.493 seconds (Sampling)
## Chain 4: 309.388 seconds (Total)
## Chain 4:
## Warning: There were 4000 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
samplesRM <- extract(samplesRegionalModel)
whichgammas <- Southlabel[electiondata$state] + 1
gammasamples <- sapply(1:length(whichgammas), function(j){
samplesRM$gamma[, whichgammas[j], which(electiondata$year[j] == years)]
})
deltasamples <- sapply(1:length(whichgammas), function(j){
samplesRM$delta[, which(electiondata$year[j] == years)]
})
Xbetasamples <- samplesRM$beta %*% t(electiondata[,5:18])
# calculate fitted values
fittedvaluesRM <- Xbetasamples + gammasamples + deltasamples
# how is the fit?
plot(fittedvaluesRM[1, ], Y)
# posterior predictive quantities
predictiedvaluesRM <- fittedvaluesRM + t(sapply(samplesRM$sigma, function(s) rnorm(length(whichgammas)) * s))
# residuals
residuepredRM <- predictiedvaluesRM - fittedvaluesRM
residueobsRM <- t(apply(fittedvaluesRM, 1, function(x) Y - x))
par(mfrow = c(1, 2))
hist(residueobsRM)
hist(residuepredRM)
# predictive means and variances VS observed
par(mfrow = c(1, 2))
for(j in 1:length(years)){
idx <- which(electiondata$year == years[j])
hist(apply(predictiedvaluesRM[, idx], 1, mean), xlab="mean", freq=FALSE, main = years[j])
abline(v=mean(Y[idx]), col = "red")
hist(apply(predictiedvaluesRM[, idx], 1, var), xlab="variance", freq=FALSE, main = years[j])
abline(v=var(Y[idx]), col = "red")
}
# Robustness to priors
# try other priors
# Robustness to Gaussian assumption
# try student t distribution
# Are all the predictors useful?
# What is the probability that predictor j is not useful, i.e. beta_j = 0?